#############################################################################
# Evolving Populist Rhetoric: How Public Approval Shapes its Employment
# Last Updated: 2024-04-17
# Purpose: Run the Model
# Version: R version 4.1.1 -- "Kick Things"
############################################################################

# 0. Set ================================================
## 0.1 Setting the Working Directory=====================
rm(list=ls())

## 0.2 Packages ========================================
library(rvest) # The package for web-scrapping
library(dplyr)
library(textcat)
library(cld3)
library(tm)
library(stringr)
library(plyr)
library(gsubfn)
library(httr)
library(quanteda)
library(jsonlite)
library(stm)
library(tibbletime)
library(rio)
library(wordcloud)
library(ggplot2)
library(plotly)
library(lubridate)
library(tidystm)
library(ggpubr)
library(scales) #label_wrap

# 2. Analysis ==================================================================
## 2.1 Import data =============================================================
speeches<-import("replication-data-the populist rhetoric.xlsx")
colnames(speeches)
speeches$time<-as.Date(speeches$time, format="%m/%d/%y")

doc <-speeches %>% group_by(id) %>% summarise(wc_all = sum(wc))
summary(doc$id)
summary(doc$wc_all)
quantile(doc$wc_all, c(.1, .2, .3, .4, .5, .6, .7, .8, .9, .99))

ggplot(aes(x=time, y=approval), data=speeches)+
  geom_line()

ggplot(aes(x=time, y=approval_quantile), data=speeches)+
  geom_line()

## 2.2 Pre-Processing ==========================================================
processed <- textProcessor(speeches$text, metadata = speeches,
                           customstopwords = c("mr","speaker","ladies","gentlemen","good morning",
                                               "good afternoon","good evening",
                                               "(miniszterelnok.hu)"))
out <- prepDocuments(processed$documents, processed$vocab,
                     processed$meta)

out$meta <-out$meta %>% mutate(elec_non_six_mont = case_when(
  time<=ymd("2018-04-08") & time>=ymd("2017-10-08") ~ 1,
  TRUE ~ 0
))

#create corpus
docs<-out$documents
vocab<-out$vocab
meta<-out$meta

## 2.3 Search K ================================================================
# See the appendix for the result. Running this code can take long
# storage <- searchK(docs, vocab, K=c(5,10,15,20,25,30,35,40,45,50), #Can Change
#                    prevalence =~ approval_quantile #Approval Rate
#                    + s(time_diff)                  #Time
#                    +election_window            #Election
#                    +location_domestic, 
#                    max.em.its = 75,
#                    data = meta, init.type = "Spectral", seed=9999, verbose =TRUE)
# 
# print(storage$results)
# options(repr.plot.width=6, repr.plot.height=6)
# 
# #Figure 1
# plot(storage) 
# 
# #exclusivity = words in topic 1 cannot be found in topic 2 (higher better)
# #semantic coherence = words in topic 1 should co-occur within the same document (higher better)
# #held-out = how well does the model predict the held-out data (higher better)
# #residuals = just like OLS (lower better)
# 
# #Which model looks ok in terms of held out and residuals?
# #I think model with 20 ~ 30 topics looks good.
# #Compare exclusivity and semantic coherence among models that looks okay
# 
# model10Prrateby<-stm(documents=out$documents, 
#                      vocab=out$vocab, 
#                      prevalence =~ approval_quantile #Approval Rate
#                      + s(time_diff)                  #Time
#                      +election_window            #Election
#                      +location_domestic,  
#                      K=20, 
#                      data=out$meta, 
#                      init.type = "Spectral", verbose=TRUE, seed=9999)
# model15Prrateby<-stm(documents=out$documents, 
#                      vocab=out$vocab,
#                      prevalence =~ approval_quantile #Approval Rate
#                      + s(time_diff)                  #Time
#                      +election_window            #Election
#                      +location_domestic, 
#                      K=25, 
#                      data=out$meta,
#                      init.type = "Spectral", verbose=TRUE, seed=9999)
# model20Prrateby<-stm(documents=out$documents, 
#                      vocab=out$vocab,
#                      prevalence =~ approval_quantile #Approval Rate
#                      + s(time_diff)                  #Time
#                      +election_window            #Election
#                      +location_domestic, 
#                      K=30,
#                      data=out$meta,
#                      init.type = "Spectral", verbose=TRUE, seed=9999)
# 
# M10ExSem<-as.data.frame(cbind(c(1:20), 
#                               exclusivity(model10Prrateby), 
#                               semanticCoherence(model=model10Prrateby, docs),
#                               "Mod20")) 
# M15ExSem<-as.data.frame(cbind(c(1:25), 
#                               exclusivity(model15Prrateby), 
#                               semanticCoherence(model=model15Prrateby, docs), 
#                               "Mod25")) 
# M20ExSem<-as.data.frame(cbind(c(1:30), 
#                               exclusivity(model20Prrateby), 
#                               semanticCoherence(model=model20Prrateby, docs), 
#                               "Mod30"))
# 
# ModsExSem<-rbind(M10ExSem, M15ExSem, M20ExSem)
# colnames(ModsExSem)<-c("K","Exclusivity", "SemanticCoherence", "Model")
# 
# ModsExSem$Exclusivity<-as.numeric(as.character(ModsExSem$Exclusivity))
# ModsExSem$SemanticCoherence<-as.numeric(as.character(ModsExSem$SemanticCoherence))
# 
# options(repr.plot.width=7, repr.plot.height=7, repr.plot.res=100)
# 
# plotexcoer<-ggplot(ModsExSem, aes(SemanticCoherence, Exclusivity, color = Model))+geom_point(size = 2, alpha = 0.7) + 
#   geom_text(aes(label=K), nudge_x=.05, nudge_y=.05)+
#   labs(x = "Semantic coherence",
#        y = "Exclusivity",
#        title = "Comparing exclusivity and semantic coherence")
# 
# #Figure 2
# plotexcoer
# 
# labelTopics(model10Prrateby) # k=20
# labelTopics(model15Prrateby) # k=25
# labelTopics(model20Prrateby) # k=30
# 
# #Which K is the best?

## 2.4 Topics ==================================================================
  # meta_app_time<-stm(documents=out$documents, 
  #                    vocab=out$vocab,
  #                    prevalence =~ approval_quantile #Approval Rate
  #                    + s(time_diff)                  #Time
  #                    +election_window                #Election
  #                    +location_domestic,             #Location
  #                    K=25, # Need to be changed
  #                    data=out$meta,
  #                    max.em.its = 75,
  #                    init.type = "Spectral", verbose=TRUE, seed=9999)
  #save(meta_app_time, file = "Data/the_populist_rhetoric_mod25.Rdata")
  load("Data/the_populist_rhetoric_mod25.Rdata")
  
  #Topic
  labelTopics(meta_app_time)
  
  #Topic Distribution
  plot(meta_app_time, type = "summary", xlim=c(0,0.3))
  
  a<-meta_app_time$theta   # document-topic distribution
  colnames(a)<-paste0("Topic ", seq(1:25))
  # Multiply all topic columns by 100 and calculate total
  topic.doc <- as.data.frame(a) %>%
    mutate(total = rowSums(., na.rm = TRUE))               # Sum all columns
  topic.doc <- topic.doc*100
  
  # Sum the relevant topics
  relavant_topics <- c(2,8,22,24)
  topic.doc_relevant <- topic.doc[, relavant_topics]
  
  summary(topic.doc)
  lapply(topic.doc, sd)
  
  topic.doc$sum_relevant_topic <- rowSums(topic.doc[, relavant_topics])
  topic.doc <- topic.doc %>% mutate(sum_irrelevant_topic = 100-sum_relevant_topic)
  
  ggplot(aes(x = sum_relevant_topic), data = topic.doc) +
    geom_histogram(binwidth = 5, fill = "grey", color = "black") +
    xlab("Proportion of all relevant topics (%)") +
    ylab("Frequency") +
    scale_x_continuous(breaks = seq(0, 100, by = 5)) +  # Set limits
    theme_minimal()
  
  ggplot(aes(x = sum_relevant_topic), data = topic.doc) +
    geom_histogram(aes(y = after_stat(count / sum(count))), binwidth = 5, fill = "grey", color = "black") +
    xlab("Proportion of all relevant topics (%)") +
    ylab("Proportion") +
    scale_x_continuous(breaks = seq(0, 100, by = 5)) +
    scale_y_continuous(labels = scales::percent) +  # Convert proportions to percentages
    theme_minimal()
  
  mean(topic.doc$sum_relevant_topic)
  
  #Sample Text (Go into the Appendix)
  thoughts2 <- findThoughts(meta_app_time, texts = meta$text, 
                            topics = 2, n = 8)$docs[[1]]
  thoughts8 <- findThoughts(meta_app_time, texts = meta$text, 
                            topics = 8, n = 8)$docs[[1]]
  thoughts22<- findThoughts(meta_app_time, texts = meta$text, 
                            topics = 22, n = 8)$docs[[1]]
  thoughts24<- findThoughts(meta_app_time, texts = meta$text, 
                            topics = 24, n = 8)$docs[[1]]

  #Which topic looks related to the populist rhetoric?
    # Topic 2/ Topic 4/ Topic 22/ Topic 24
    thoughts2
    thoughts8
    thoughts22
    thoughts24
    
    #Word box
    #Figure 3
    par(mfrow=c(1,2), mar = c(0.5,0.5,1,0.5))
    plot.STM(meta_app_time, "labels", topics=c(2,8), #Can incorporate more topics 
             label="f",
             n=15, width=30,
             main = "FREX") #top 15 FREX words per topics
    plot.STM(meta_app_time, "labels", topics=c(2,8), #Can incorporate more topics
             label="prob",
             n=15, width=30,
             main = "High Prob")#top 15 High Prob words per topics
    #Figure 4
    par(mfrow=c(1,2), mar = c(0.5,0.5,1,0.5))
    plot.STM(meta_app_time, "labels", topics=c(22,24), #Can incorporate more topics 
             label="f",
             n=15, width=30,
             main = "FREX")#top 15 FREX words per topics
    plot.STM(meta_app_time, "labels", topics=c(22,24), #Can incorporate more topics
             label="prob",
             n=15, width=30,
             main = "High Prob")#top 15 High Prob words per topics

## 2.5 Estimate Effect =========================================================
  #Populist Topic - 2, 8, 22, 24
  colnames(out$meta)
  out$meta$approval_quantile<-as.numeric(out$meta$approval_quantile)
  out$meta$location_domestic<-as.factor(out$meta$location_domestic)
  
  # prep<-estimateEffect(1:25~ approval_quantile + s(time_diff)+elec_non_two_mont + location_domestic,
  #                      meta_app_time,
  #                      meta=out$meta, uncertainty="Global")
  #save(prep, file = "Data/the_populist_rhetoric_regression.Rdata")
  
  load("Data/the_populist_rhetoric_regression.Rdata")
  summary(prep, topics=c(2,8,22,24), nsim=1000)

### 2.5.1 Time =================================================================
  relavant_topics<-c(2,8,22,24)
  relavant_topics_name <-c("Liberal Democracy Crisis & Christian Democracy",
                           "People-Centrism",
                           "Anti-Immigration",
                           "Roots of Hungarians")
  relevant_com <- data.frame(topic_num = relavant_topics,
                             topic_str = relavant_topics_name)
  
  effect2<-extract.estimateEffect(prep, "time_diff",
                                  model = meta_app_time, method = "continuous") #Num of values is bigger than actual date because of knots
  
  effect2<-effect2 %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  effect2<-merge(x= effect2, y= relevant_com, by="topic_num", all.x = TRUE)
  effect2<-effect2 %>% mutate(time_diff = floor(covariate.value))
  base_date <-as.Date("2014-06-06")
  
  effect2<-effect2 %>% mutate(time = base_date + days(time_diff))
  effect2$time<-ymd(effect2$time)
  
  # Topic 2
  topic2<- effect2  %>% filter(topic_num==2) %>% 
    ggplot(aes(x = time, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = time, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = time, y = ci.upper), linetype="dashed")+
    xlab("Days")+
    ylab("Expected topic proportions")+
    ggtitle("Liberal Democracy Crisis & Christian Democracy") + 
    scale_x_date(breaks = seq(min(effect2$time), max(effect2$time), by = "1 year"), date_labels = "%Y") +
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic8<- effect2  %>% filter(topic_num==8) %>% 
    ggplot(aes(x = time, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = time, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = time, y = ci.upper), linetype="dashed")+
    xlab("Days")+
    ylab("Expected topic proportions")+
    ggtitle("People-Centrism") + 
    scale_x_date(breaks = seq(min(effect2$time), max(effect2$time), by = "1 year"), date_labels = "%Y") +
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic22<- effect2  %>% filter(topic_num==22) %>% 
    ggplot(aes(x = time, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = time, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = time, y = ci.upper), linetype="dashed")+
    xlab("Days")+
    ylab("Expected topic proportions")+
    ggtitle("Anti-Immigration") + 
    scale_x_date(breaks = seq(min(effect2$time), max(effect2$time), by = "1 year"), date_labels = "%Y") +
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic24<- effect2  %>% filter(topic_num==24) %>% 
    ggplot(aes(x = time, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = time, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = time, y = ci.upper), linetype="dashed")+
    xlab("Days")+
    ylab("Expected topic proportions")+
    ggtitle("Roots of Hungarians") + 
    scale_x_date(breaks = seq(min(effect2$time), max(effect2$time), by = "1 year"), date_labels = "%Y") +
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  fig<- ggarrange(topic2, topic8, topic22, topic24, ncol = 2, nrow = 2)
  
  ggsave(fig, filename = "Figure(s)/time_topic_proportions.jpeg", width = 12, height = 8, dpi = 1000)
  
### 2.5.2 Approval Rate =================================================
  effect<-extract.estimateEffect(prep, "approval_quantile",cov.value1 = "4",cov.value2 = "1", 
                                 model = meta_app_time, method = "difference") #Prob Rightt-wing compared to Left-wing (Positive = Right-wing)
  
  effect<-effect %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  effect<-merge(x= effect, y= relevant_com, by="topic_num", all.x = TRUE)
  effect<-effect %>% arrange(topic_num)
  effect$topic_str <- factor(effect$topic_str, levels = unique(effect$topic_str))
  
  fig<- effect %>%
    ggplot(aes(x = estimate, y = reorder(topic_str, -topic_num), xmin = ci.lower, xmax = ci.upper)) +
    geom_point(size = 1.5) +
    geom_errorbarh(height = 0.1) +
    xlab("")+
    ylab("")+
    geom_vline(xintercept = 0, color = "red") +
    scale_y_discrete(labels = label_wrap(30)) +
    theme_bw()+
    theme(axis.text.y = element_text(size = 12))
  
  ggsave(fig, filename = "Figure(s)/approval_topic_proportions.jpeg", width = 12, height = 8, dpi = 1000)
  
  #Prediction
  prediction<-extract.estimateEffect(prep, "approval_quantile", 
                                 model = meta_app_time, method = "continuous")
  prediction<-prediction %>% filter(covariate.value %in% c(1,4))
  prediction<-prediction %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  prediction<-merge(x= prediction, y= relevant_com, by="topic_num", all.x = TRUE)
  prediction<-prediction %>% arrange(topic_num)
  
  topic2<- prediction  %>% filter(topic_num==2) %>% 
    ggplot(aes(x = covariate.value, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = covariate.value, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = covariate.value, y = ci.upper), linetype="dashed")+
    xlab("Approval rate")+
    ylab("Expected topic proportions")+
    ggtitle("Liberal Democracy Crisis & Christian Democracy") + 
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic8<- prediction  %>% filter(topic_num==8) %>% 
    ggplot(aes(x = covariate.value, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = covariate.value, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = covariate.value, y = ci.upper), linetype="dashed")+
    xlab("Approval rate")+
    ylab("Expected topic proportions")+
    ggtitle("People-Centrism") + 
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic22<- prediction  %>% filter(topic_num==22) %>% 
    ggplot(aes(x = covariate.value, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = covariate.value, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = covariate.value, y = ci.upper), linetype="dashed")+
    xlab("Approval rate")+
    ylab("Expected topic proportions")+
    ggtitle("Anti-Immigration") + 
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  topic24<- prediction  %>% filter(topic_num==24) %>% 
    ggplot(aes(x = covariate.value, y = estimate, ymin = ci.lower, ymax = ci.upper))+
    geom_line(color = "red") +
    geom_line(aes(x = covariate.value, y = ci.lower), linetype="dashed")+
    geom_line(aes(x = covariate.value, y = ci.upper), linetype="dashed")+
    xlab("Approval rate")+
    ylab("Expected topic proportions")+
    ggtitle("Roots of Hungarians") + 
    #facet_wrap(~ reorder(topic_str, topic_group_num), ncol = 2) +
    theme_bw()+
    theme(plot.title = element_text(hjust = 0.5))
  
  ggarrange(topic2, topic8, topic22, topic24, ncol = 2, nrow = 2)
  
### 2.5.4 Election Window_factor================================================
  effect_election<-extract.estimateEffect(prep, "elec_non_two_mont",cov.value1 = "1",cov.value2 = "0", 
                                 model = meta_app_time, method = "difference") 
  
  effect_election<-effect_election %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  effect_election<-merge(x= effect_election, y= relevant_com, by="topic_num", all.x = TRUE)
  effect_election<-effect_election %>% arrange(topic_num)
  effect_election$topic_str <- factor(effect_election$topic_str, levels = unique(effect_election$topic_str))
  effect_election<-effect_election %>% mutate(type = "Two-month electoral period")
  
  prep_one<-estimateEffect(1:25~ approval_quantile + s(time_diff)+elec_non_one_mont+location_domestic, 
                           meta_app_time, 
                           meta=out$meta, uncertainty="Global")
  
  effect_election2<-extract.estimateEffect(prep_one, "elec_non_one_mont",cov.value1 = "1",cov.value2 = "0", 
                                          model = meta_app_time, method = "difference") 
  effect_election2<-effect_election2 %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  effect_election2<-merge(x= effect_election2, y= relevant_com, by="topic_num", all.x = TRUE)
  effect_election2<-effect_election2 %>% arrange(topic_num)
  effect_election2$topic_str <- factor(effect_election2$topic_str, levels = unique(effect_election2$topic_str))
  effect_election2<-effect_election2 %>% mutate(type = "One-month electoral period")
  
  # prep_six<-estimateEffect(1:25~ approval_quantile + s(time_diff)+elec_non_six_mont+location_domestic, 
  #                          meta_app_time, 
  #                          meta=out$meta, uncertainty="Global")
  # 
  # effect_election6<-extract.estimateEffect(prep_six, "elec_non_six_mont",cov.value1 = "1",cov.value2 = "0", 
  #                                          model = meta_app_time, method = "difference") 
  # effect_election6<-effect_election6 %>% filter(topic %in% relavant_topics) %>% rename(topic_num = topic)
  # effect_election6<-merge(x= effect_election6, y= relevant_com, by="topic_num", all.x = TRUE)
  # effect_election6<-effect_election6 %>% arrange(topic_num)
  # effect_election6$topic_str <- factor(effect_election6$topic_str, levels = unique(effect_election6$topic_str))
  # effect_election6<-effect_election6 %>% mutate(type = "six-month electoral period")
  
  effect_election<-rbind(effect_election, effect_election2)
  
  fig<-effect_election %>%
    ggplot(aes(x = estimate, y = reorder(topic_str, -topic_num), xmin = ci.lower, xmax = ci.upper)) +
    geom_point(size = 1.5) +
    geom_errorbarh(height = 0.1) +
    xlab("")+
    ylab("")+
    geom_vline(xintercept = 0, color = "red") +
    scale_y_discrete(labels = label_wrap(30)) +
    facet_wrap(~ type, ncol = 2) +
    theme_bw()+
    theme(axis.text.y = element_text(size = 12),
          strip.text = element_text(size = 14))
  fig
  ggsave(fig, filename = "Figure(s)/election_topic_proportions.jpeg", width = 12, height = 8, dpi = 1000)

## 2.6 Robust Test ===========================================================
#LDA
nometa<-stm(documents=out$documents, 
                   vocab=out$vocab,
                   K=25, # Need to be changed
                   data=out$meta,
                   max.em.its = 75,
                   init.type = "Spectral", verbose=TRUE, seed=9999)

labelTopics(nometa)

## 2.7 Model Comparison ===========================================================
labelTopics(model10Prrateby) # k=20
labelTopics(model15Prrateby) # k=25
labelTopics(model20Prrateby) # k=30

#Liberal Democracy Crisis - All model produce similar
crisis_25 <- findThoughts(model15Prrateby, texts = meta$text, 
                          topics = 2, n = 10)$docs[[1]]
crisis_20 <- findThoughts(model10Prrateby, texts = meta$text, 
                          topics = 7, n = 10)$docs[[1]]
crisis_30 <- findThoughts(model20Prrateby, texts = meta$text, 
                          topics = 8, n = 10)$docs[[1]]
crisis_25
crisis_20
crisis_30

#People-Centrism  - All model produce similar
people_25 <- findThoughts(model15Prrateby, texts = meta$text, 
                          topics = 8, n = 10)$docs[[1]]
people_20 <- findThoughts(model10Prrateby, texts = meta$text, 
                          topics = 10, n = 10)$docs[[1]]
people_30 <- findThoughts(model20Prrateby, texts = meta$text, 
                          topics = 6, n = 10)$docs[[1]]
people_25
people_20
people_30

#Anti-immigration - All model produce similar
immi_25 <- findThoughts(model15Prrateby, texts = meta$text, 
                        topics = 22, n = 10)$docs[[1]]
immi_20 <- findThoughts(model10Prrateby, texts = meta$text, 
                        topics = 19, n = 10)$docs[[1]]
immi_30 <- findThoughts(model20Prrateby, texts = meta$text, 
                        topics = 24, n = 10)$docs[[1]]
immi_25
immi_20
immi_30

#Roots of Hungarian - All model produce similar
root_25 <- findThoughts(model15Prrateby, texts = meta$text, 
                        topics = 24, n = 10)$docs[[1]]
root_20 <- findThoughts(model10Prrateby, texts = meta$text, 
                        topics = 9, n = 10)$docs[[1]]
root_30 <- findThoughts(model20Prrateby, texts = meta$text, 
                        topics = 1, n = 10)$docs[[1]]
root_25
root_20
root_30

